import string
from random import random

from pyspark.sql import *
from pyspark.sql.types import *
import pyspark.sql.functions as f

if __name__ == '__main__':
    # spark=SparkSession.builder.appName("test16_udf_define").master("local[*]").getOrCreate()
    # sc=spark.sparkContext

    spark=SparkSession.builder.appName("test1_dataFrame_create")\
        .master("local[*]").getOrCreate()
    sc=spark.sparkContext
    rdd=sc.parallelize([[1],[2],[3]])
    df=spark.createDataFrame(rdd,["num"])
    df.show()

    def process (num:int) -> dict:
        return {"num":num,"letters":string.ascii_letters[num]}

    # 字典    StructField()的列名要和process自定义函数的列名一一对应
    returnType=StructType().add("num",IntegerType(),True).add("letters",StringType(),True)

    udf1=spark.udf.register("udf1",process,returnType)
    # DSL
    df.select(udf1(df["num"])).show(truncate=False)
    # SQL
    df.selectExpr("udf1(num)").show(truncate=False)

    # {'numm': 1, 'letterss': 'a'}
    # print({"numm":1,"letterss":"a"})




